* Patent breadth: calculate number of IPC (and CPC) codes for each patent

clear all
set more off 

cd "\\kant\sv-econ-felles\viten\federc"

* ----------------------------------------------
* Count number of IPC codes for each appln_id
* ----------------------------------------------
use tls209, clear
keep appln_id ipc_class_symbol
/*
* Get a random sample
generate random = runiform()
sort random
generate insample = _n <= 100 
keep if insample==1
*/
* Remove duplicates
replace ipc_class_symbol = trim( ipc_class_symbol )
by appln_id ipc_class_symbol, sort: gen unq=_n==1
drop if unq==0

* Count number of IPC codes for each patent application
egen tag = tag(appln_id ipc_class_symbol) 
collapse (sum) num_ipc = tag, by(appln_id)
// On average 3 IPC codes per appln_id

saveold ipc_codes, replace


* ----------------------------------------------
* Count number of CPC codes for each appln_id
* ----------------------------------------------
* If we want to use an alternative patent classification as robustness
use tls224, clear
keep appln_id cpc_class_symbol

* Remove duplicates
replace cpc_class_symbol = trim( cpc_class_symbol )
by appln_id cpc_class_symbol, sort: gen unq=_n==1
drop if unq==0

* Count number of IPC codes for each patent application
egen tag = tag(appln_id cpc_class_symbol) 
collapse (sum) num_cpc = tag, by(appln_id)
// On average 3.8 CPC codes per appln_id

saveold cpc_codes, replace
